Forecasts are recursively computed
Forecasts from covariance stationary time series are mean reverting
res = SARIMAX(m2_growth, order=(1, 0, 0), trend="c").fit()
summary(res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | 0.0021 | 0.000 | 11.315 | 0.000 | 0.002 | 0.002 |
| ar.L1 | 0.6326 | 0.010 | 62.755 | 0.000 | 0.613 | 0.652 |
| sigma2 | 1.221e-05 | 1.85e-07 | 65.856 | 0.000 | 1.18e-05 | 1.26e-05 |
fcast = res.forecast(12)
plot(fcast, y=-2)
p0 = res.params["intercept"]
p1 = res.params["ar.L1"]
direct = pd.Series(np.zeros(12), index=fcast.index)
for i in range(12):
if i == 0:
direct[i] = p0 + p1 * m2_growth.iloc[-1]
else:
direct[i] = p0 + p1 * direct[i - 1]
plot(direct)
t = m2_growth.shape[0]
half = t // 2
forecasts = []
res = SARIMAX(m2_growth.iloc[:half], order=(1, 0, 0), trend="c").fit()
forecasts.append(res.forecast(1))
for i in range(half, t):
res = res.extend(m2_growth.iloc[i : i + 1])
forecasts.append(res.forecast(1))
h1 = pd.concat(forecasts)
h1.name = "h1"
extend using second half and use get_prediction()res2 = SARIMAX(m2_growth.iloc[:half], order=(1, 0, 0), trend="c").fit()
res2 = res2.extend(m2_growth.iloc[half:])
fast_h1 = res2.get_prediction().predicted_mean
fast_h1.name = "fast_h1"
pd.concat([h1, fast_h1], axis=1).head()
| h1 | fast_h1 | |
|---|---|---|
| 1990-06-01 | 0.001715 | 0.001715 |
| 1990-07-01 | 0.004789 | 0.004789 |
| 1990-08-01 | 0.004279 | 0.004279 |
| 1990-09-01 | 0.005718 | 0.005718 |
| 1990-10-01 | 0.004645 | 0.004645 |
fig, ax = plt.subplots(1, 1)
h1.plot(ax=ax)
_ = realizations.plot(ax=ax)
fe = (realizations - h1).dropna()
plot(fe, 13)
acf_pacf_plot(fe, 24, size=-2)
multistep_forecast_plot()
plot(fe_12)
acf_pacf_plot(fe_12, 24, size=13)
mz = sm.OLS(realizations, sm.add_constant(combined["h1"])).fit()
summary(mz)
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.0004 | 0.000 | 0.936 | 0.350 | -0.000 | 0.001 |
| h1 | 0.8481 | 0.061 | 13.985 | 0.000 | 0.729 | 0.967 |
mz.tvalues["const"]
0.9362189787611457
(mz.params["h1"] - 1) / mz.bse["h1"]
-2.504731229244801
stat = mz.wald_test((np.eye(2), [0, 1]), use_f=False)
stat
<class 'statsmodels.stats.contrast.ContrastResults'> <Wald test (chi2): statistic=[[9.93955905]], p-value=0.006944678984617448, df_denom=2>
mz_plot()
mz2 = sm.OLS(combined.fe, sm.add_constant(combined.h1)).fit()
summary(mz2)
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.0004 | 0.000 | 0.936 | 0.350 | -0.000 | 0.001 |
| h1 | -0.1519 | 0.061 | -2.505 | 0.013 | -0.271 | -0.033 |
plot(combined[["h1", "recursive", "rolling", "realizations"]].iloc[:-36], y=13)
rw = m2_growth.shift(-1)
rw.name = "rw"
combined = pd.concat([combined, rw], axis=1).dropna()
l_a = (combined.realizations - combined.h1) ** 2
l_b = (combined.realizations - combined.rw) ** 2
print(l_a.mean(), l_b.mean())
delta = l_a - l_b
1.947078781444988e-05 2.3835737833687686e-05
mod = sm.OLS(delta, np.ones_like(delta))
dm_res = mod.fit(cov_type="HAC", cov_kwds={"maxlags": int(t ** (1 / 3))})
summary(dm_res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | -4.365e-06 | 2.16e-06 | -2.017 | 0.044 | -8.61e-06 | -1.24e-07 |
plot(delta)
la = np.abs(combined.realizations - combined.h1)
lb = np.abs(combined.realizations - combined.rw)
delta = la - lb
mod = sm.OLS(delta, np.ones_like(delta))
dm_mae_res = mod.fit(cov_type="HAC", cov_kwds={"maxlags": int(t ** (1 / 3))})
summary(dm_mae_res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | -0.0003 | 0.000 | -2.358 | 0.018 | -0.000 | -4.27e-05 |
acf_pacf_plot(delta, 24, size=13)
plot(vix)
print("Splits:")
print(splits)
print("\nOOS SSE:")
oos_sse
Splits: [146 292 438 584 730] OOS SSE:
1 16.848481 2 16.647000 3 16.540600 4 16.426092 5 16.019054 6 16.090477 7 16.052844 8 16.050795 9 16.040110 10 16.056220 11 16.116910 12 16.123724 dtype: float64
plot(oos_sse)
plot_sse_ic()
plot(housing, y=-2)
acf_pacf_plot(housing, 24, size=-2)
plot(housing_yoy, y=12)
acf_pacf_plot(housing_yoy, 24)
res = SARIMAX(housing_yoy, order=(1, 0, 0)).fit()
resids = res.resid.iloc[1:]
acf_pacf_plot(resids, 24, size=-2)
res = SARIMAX(housing_yoy, order=(2, 0, 0), seasonal_order=(0, 0, 1, 12)).fit()
summary(res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| ar.L1 | 0.6809 | 0.034 | 20.284 | 0.000 | 0.615 | 0.747 |
| ar.L2 | 0.2824 | 0.034 | 8.233 | 0.000 | 0.215 | 0.350 |
| ma.S.L12 | -0.8795 | 0.017 | -50.520 | 0.000 | -0.914 | -0.845 |
| sigma2 | 0.0083 | 0.000 | 21.791 | 0.000 | 0.008 | 0.009 |
acf_pacf_plot(res.resid.iloc[13:], 24)
res = SARIMAX(housing_raw, order=(1, 0, 1), seasonal_order=(1, 0, 1, 12)).fit()
summary(res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| ar.L1 | 0.9994 | 0.001 | 1449.669 | 0.000 | 0.998 | 1.001 |
| ma.L1 | -0.3196 | 0.031 | -10.258 | 0.000 | -0.381 | -0.259 |
| ar.S.L12 | 0.9987 | 0.001 | 1455.660 | 0.000 | 0.997 | 1.000 |
| ma.S.L12 | -0.8956 | 0.016 | -54.434 | 0.000 | -0.928 | -0.863 |
| sigma2 | 0.0083 | 0.000 | 22.227 | 0.000 | 0.008 | 0.009 |
acf_pacf_plot(res.resid.iloc[13:], 24)
res = SARIMAX(housing_raw, order=(1, 0, 1), seasonal_order=(0, 1, 1, 12)).fit()
summary(res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| ar.L1 | 0.9778 | 0.008 | 126.962 | 0.000 | 0.963 | 0.993 |
| ma.L1 | -0.3128 | 0.033 | -9.354 | 0.000 | -0.378 | -0.247 |
| ma.S.L12 | -0.8774 | 0.018 | -48.040 | 0.000 | -0.913 | -0.842 |
| sigma2 | 0.0083 | 0.000 | 21.771 | 0.000 | 0.008 | 0.009 |
acf_pacf_plot(res.resid.iloc[13:], 24)
Set both differences to 1
Model is
res = SARIMAX(housing_raw, order=(0, 1, 1), seasonal_order=(0, 1, 1, 12)).fit()
summary(res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| ma.L1 | -0.3246 | 0.031 | -10.361 | 0.000 | -0.386 | -0.263 |
| ma.S.L12 | -0.8774 | 0.019 | -47.049 | 0.000 | -0.914 | -0.841 |
| sigma2 | 0.0084 | 0.000 | 22.025 | 0.000 | 0.008 | 0.009 |
acf_pacf_plot(res.resid.iloc[13:], 24)
month = housing_raw.index.strftime("%b")
dummies = pd.get_dummies(month)
dummies = dummies[
["Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
]
dummies.index = housing_raw.index
dummies.head()
| Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1960-01-01 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1960-02-01 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1960-03-01 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1960-04-01 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1960-05-01 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
res = SARIMAX(housing_raw, order=(2, 1, 0), exog=dummies, trend="c").fit()
summary(res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | 0.0002 | 0.004 | 0.046 | 0.963 | -0.007 | 0.008 |
| Feb | 0.0358 | 0.012 | 2.965 | 0.003 | 0.012 | 0.059 |
| Mar | 0.3075 | 0.012 | 24.776 | 0.000 | 0.283 | 0.332 |
| Apr | 0.4289 | 0.015 | 29.517 | 0.000 | 0.400 | 0.457 |
| May | 0.4669 | 0.018 | 26.261 | 0.000 | 0.432 | 0.502 |
| Jun | 0.4697 | 0.019 | 25.309 | 0.000 | 0.433 | 0.506 |
| Jul | 0.4328 | 0.019 | 23.266 | 0.000 | 0.396 | 0.469 |
| Aug | 0.4117 | 0.019 | 22.228 | 0.000 | 0.375 | 0.448 |
| Sep | 0.3657 | 0.017 | 21.803 | 0.000 | 0.333 | 0.399 |
| Oct | 0.3921 | 0.015 | 26.254 | 0.000 | 0.363 | 0.421 |
| Nov | 0.2170 | 0.013 | 16.944 | 0.000 | 0.192 | 0.242 |
| Dec | 0.0502 | 0.010 | 5.242 | 0.000 | 0.031 | 0.069 |
| ar.L1 | -0.2675 | 0.033 | -8.115 | 0.000 | -0.332 | -0.203 |
| ar.L2 | -0.1107 | 0.034 | -3.277 | 0.001 | -0.177 | -0.045 |
| sigma2 | 0.0087 | 0.000 | 21.345 | 0.000 | 0.008 | 0.010 |
resid = res.resid.iloc[2:]
plot(resid, y=-2)
acf_pacf_plot(resid, 24)
adf_cv_plot()
from arch.unitroot import ADF
adf = ADF(default)
adf.summary()
| Test Statistic | -3.866 |
| P-value | 0.002 |
| Lags | 10 |
adf = ADF(curve)
adf.summary()
| Test Statistic | -4.412 |
| P-value | 0.000 |
| Lags | 19 |
adf = ADF(np.log(orig.INDPRO), trend="c")
adf.summary()
| Test Statistic | -2.186 |
| P-value | 0.211 |
| Lags | 4 |
summary(adf.regression)
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Level.L1 | -0.0017 | 0.001 | -2.186 | 0.029 | -0.003 | -0.000 |
| Diff.L1 | 0.3637 | 0.037 | 9.860 | 0.000 | 0.291 | 0.436 |
| Diff.L2 | -0.1110 | 0.039 | -2.832 | 0.005 | -0.188 | -0.034 |
| Diff.L3 | 0.0447 | 0.039 | 1.138 | 0.255 | -0.032 | 0.122 |
| Diff.L4 | 0.0217 | 0.037 | 0.589 | 0.556 | -0.051 | 0.094 |
| const | 0.0083 | 0.003 | 2.579 | 0.010 | 0.002 | 0.015 |
adf = ADF(np.log(orig.INDPRO), trend="ct")
adf
| Test Statistic | -1.831 |
| P-value | 0.690 |
| Lags | 6 |
delta_indpro = np.log(orig.INDPRO).diff().dropna()
adf = ADF(delta_indpro, trend="c")
adf
| Test Statistic | -11.945 |
| P-value | 0.000 |
| Lags | 3 |
plot(trending, 13)
ADF(trending, trend="n")
| Test Statistic | 1.934 |
| P-value | 0.988 |
| Lags | 9 |
ADF(trending, trend="c")
| Test Statistic | -1.146 |
| P-value | 0.696 |
| Lags | 9 |
ADF(trending, trend="ct")
| Test Statistic | -6.790 |
| P-value | 0.000 |
| Lags | 0 |
ADF(y, trend="ctt")
| Test Statistic | -6.885 |
| P-value | 0.000 |
| Lags | 0 |
res = SARIMAX(trending, order=(1, 0, 0), trend="ct").fit()
summary(res)
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | 0.0796 | 0.116 | 0.687 | 0.492 | -0.148 | 0.307 |
| drift | 0.0255 | 0.004 | 6.149 | 0.000 | 0.017 | 0.034 |
| ar.L1 | 0.6822 | 0.050 | 13.534 | 0.000 | 0.583 | 0.781 |
| sigma2 | 0.9213 | 0.080 | 11.451 | 0.000 | 0.764 | 1.079 |
plot(res.resid.iloc[1:])